Deep Learning: A Simple Example

  • Let’s get back to the Name Gender Classifier.

Prepare Data

import numpy as np
import nltk
from nltk.corpus import names
import random
labeled_names = ([(name, 1) for name in names.words('male.txt')] +
                 [(name, 0) for name in names.words('female.txt')])
random.shuffle(labeled_names)

Train-Test Split

from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(labeled_names, test_size = 0.2, random_state=42)
print(len(train_set), len(test_set))
6355 1589

Feature Engineering

  • In deep learning, words or characters are automatically converted into numeric representations.

  • In other words, the feature engineering step is fully automatic.

  • Steps:

    • Text to Integers

    • Padding each instance to be of same lengths

import tensorflow as tf
import tensorflow.keras as keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.utils import to_categorical, plot_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import SpatialDropout1D
names = [n for (n, l) in train_set]
labels = [l for (n, l) in train_set] 
len(names)
6355

Tokenizer

tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(names)

Text to Sequences

names_ints = tokenizer.texts_to_sequences(names)
print(names[:10])
print(names_ints[:10])
print(labels[:10])
['Kimberlyn', 'Juan', 'Mariska', 'Rudd', 'Kitty', 'Janos', 'Aryn', 'Ana', 'Winny', 'Chevalier']
[[18, 3, 12, 15, 2, 5, 6, 11, 4], [19, 16, 1, 4], [12, 1, 5, 3, 9, 18, 1], [5, 16, 10, 10], [18, 3, 8, 8, 11], [19, 1, 4, 7, 9], [1, 5, 11, 4], [1, 4, 1], [23, 3, 4, 4, 11], [14, 13, 2, 20, 1, 6, 3, 2, 5]]
[0, 1, 0, 1, 0, 1, 0, 0, 0, 1]

Vocabulary

# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
Vocabulary Size: 30
tokenizer.word_index
{'a': 1,
 'e': 2,
 'i': 3,
 'n': 4,
 'r': 5,
 'l': 6,
 'o': 7,
 't': 8,
 's': 9,
 'd': 10,
 'y': 11,
 'm': 12,
 'h': 13,
 'c': 14,
 'b': 15,
 'u': 16,
 'g': 17,
 'k': 18,
 'j': 19,
 'v': 20,
 'f': 21,
 'p': 22,
 'w': 23,
 'z': 24,
 'x': 25,
 'q': 26,
 '-': 27,
 ' ': 28,
 "'": 29}

Padding

names_lens=[len(n) for n in names_ints]
names_lens
import seaborn as sns
sns.displot(names_lens)
print(names[np.argmax(names_lens)]) # longest name
Jean-Christophe
../_images/dl-simple-case_22_1.png
max_len = names_lens[np.argmax(names_lens)]
max_len
15
names_ints_pad = sequence.pad_sequences(names_ints, maxlen = max_len)
names_ints_pad[:10]
array([[ 0,  0,  0,  0,  0,  0, 18,  3, 12, 15,  2,  5,  6, 11,  4],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 19, 16,  1,  4],
       [ 0,  0,  0,  0,  0,  0,  0,  0, 12,  1,  5,  3,  9, 18,  1],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5, 16, 10, 10],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 18,  3,  8,  8, 11],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 19,  1,  4,  7,  9],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  5, 11,  4],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  1],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 23,  3,  4,  4, 11],
       [ 0,  0,  0,  0,  0,  0, 14, 13,  2, 20,  1,  6,  3,  2,  5]],
      dtype=int32)

Define X and Y

X_train = np.array(names_ints_pad).astype('float32')
y_train = np.array(labels)

X_test = np.array(sequence.pad_sequences(
    tokenizer.texts_to_sequences([n for (n,l) in test_set]),
    maxlen = max_len)).astype('float32')
y_test = np.array([l for (n,l) in test_set])

X_test_texts = [n for (n,l) in test_set]
X_train.shape
(6355, 15)
X_train[2,]
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 12.,  1.,  5.,  3.,  9.,
       18.,  1.], dtype=float32)

Model Definition

import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
## Plotting results
# def plot(history):

#     matplotlib.rcParams['figure.dpi'] = 100
#     acc = history.history['accuracy']
#     val_acc = history.history['val_accuracy']
#     loss = history.history['loss']
#     val_loss = history.history['val_loss']

#     epochs = range(1, len(acc)+1)
#     ## Accuracy plot
#     plt.plot(epochs, acc, 'bo', label='Training acc')
#     plt.plot(epochs, val_acc, 'b', label='Validation acc')
#     plt.title('Training and validation accuracy')
#     plt.legend()
#     ## Loss plot
#     plt.figure()

#     plt.plot(epochs, loss, 'bo', label='Training loss')
#     plt.plot(epochs, val_loss, 'b', label='Validation loss')
#     plt.title('Training and validation loss')
#     plt.legend()
#     plt.show()

    
def plot(history):
    pd.DataFrame(history.history).plot(figsize=(8,5))
    plt.grid(True)
    #plt.gca().set_ylim(0,1)
    plt.show()

Model 1

  • Two layers of fully-connected dense layers

from keras import layers
model1 = keras.Sequential()
model1.add(keras.Input(shape=(max_len,)))
model1.add(layers.Dense(128, activation="relu", name="dense_layer_1"))
model1.add(layers.Dense(128, activation="relu", name="dense_layer_2"))
model1.add(layers.Dense(2, activation="softmax", name="output"))

model1.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"]
)
plot_model(model1, show_shapes=True )
../_images/dl-simple-case_33_0.png
history1 = model1.fit(X_train, y_train, 
                    batch_size=128, 
                    epochs=50, verbose=2,
                   validation_split = 0.2)
Epoch 1/50
40/40 - 3s - loss: 0.6426 - accuracy: 0.6764 - val_loss: 0.5912 - val_accuracy: 0.7183
Epoch 2/50
40/40 - 0s - loss: 0.5752 - accuracy: 0.7122 - val_loss: 0.5549 - val_accuracy: 0.7144
Epoch 3/50
40/40 - 0s - loss: 0.5504 - accuracy: 0.7254 - val_loss: 0.5492 - val_accuracy: 0.7231
Epoch 4/50
40/40 - 0s - loss: 0.5319 - accuracy: 0.7307 - val_loss: 0.5299 - val_accuracy: 0.7286
Epoch 5/50
40/40 - 0s - loss: 0.5258 - accuracy: 0.7349 - val_loss: 0.5234 - val_accuracy: 0.7270
Epoch 6/50
40/40 - 0s - loss: 0.5190 - accuracy: 0.7402 - val_loss: 0.5315 - val_accuracy: 0.7254
Epoch 7/50
40/40 - 0s - loss: 0.5092 - accuracy: 0.7469 - val_loss: 0.5171 - val_accuracy: 0.7309
Epoch 8/50
40/40 - 0s - loss: 0.4967 - accuracy: 0.7516 - val_loss: 0.5041 - val_accuracy: 0.7356
Epoch 9/50
40/40 - 0s - loss: 0.4848 - accuracy: 0.7598 - val_loss: 0.5238 - val_accuracy: 0.7356
Epoch 10/50
40/40 - 0s - loss: 0.4879 - accuracy: 0.7498 - val_loss: 0.5286 - val_accuracy: 0.7215
Epoch 11/50
40/40 - 0s - loss: 0.4826 - accuracy: 0.7604 - val_loss: 0.5182 - val_accuracy: 0.7530
Epoch 12/50
40/40 - 0s - loss: 0.4851 - accuracy: 0.7587 - val_loss: 0.5311 - val_accuracy: 0.7419
Epoch 13/50
40/40 - 0s - loss: 0.4721 - accuracy: 0.7638 - val_loss: 0.5045 - val_accuracy: 0.7404
Epoch 14/50
40/40 - 0s - loss: 0.4706 - accuracy: 0.7673 - val_loss: 0.5051 - val_accuracy: 0.7490
Epoch 15/50
40/40 - 0s - loss: 0.4634 - accuracy: 0.7683 - val_loss: 0.5141 - val_accuracy: 0.7419
Epoch 16/50
40/40 - 0s - loss: 0.4631 - accuracy: 0.7628 - val_loss: 0.4988 - val_accuracy: 0.7341
Epoch 17/50
40/40 - 0s - loss: 0.4592 - accuracy: 0.7644 - val_loss: 0.5055 - val_accuracy: 0.7498
Epoch 18/50
40/40 - 0s - loss: 0.4533 - accuracy: 0.7691 - val_loss: 0.5003 - val_accuracy: 0.7427
Epoch 19/50
40/40 - 0s - loss: 0.4558 - accuracy: 0.7677 - val_loss: 0.4885 - val_accuracy: 0.7506
Epoch 20/50
40/40 - 0s - loss: 0.4454 - accuracy: 0.7714 - val_loss: 0.5038 - val_accuracy: 0.7254
Epoch 21/50
40/40 - 0s - loss: 0.4455 - accuracy: 0.7793 - val_loss: 0.5126 - val_accuracy: 0.7506
Epoch 22/50
40/40 - 0s - loss: 0.4410 - accuracy: 0.7720 - val_loss: 0.4942 - val_accuracy: 0.7498
Epoch 23/50
40/40 - 0s - loss: 0.4478 - accuracy: 0.7726 - val_loss: 0.5109 - val_accuracy: 0.7498
Epoch 24/50
40/40 - 0s - loss: 0.4474 - accuracy: 0.7683 - val_loss: 0.4917 - val_accuracy: 0.7427
Epoch 25/50
40/40 - 0s - loss: 0.4415 - accuracy: 0.7760 - val_loss: 0.4905 - val_accuracy: 0.7514
Epoch 26/50
40/40 - 0s - loss: 0.4395 - accuracy: 0.7752 - val_loss: 0.5123 - val_accuracy: 0.7419
Epoch 27/50
40/40 - 0s - loss: 0.4414 - accuracy: 0.7756 - val_loss: 0.5014 - val_accuracy: 0.7624
Epoch 28/50
40/40 - 0s - loss: 0.4361 - accuracy: 0.7679 - val_loss: 0.5028 - val_accuracy: 0.7435
Epoch 29/50
40/40 - 0s - loss: 0.4404 - accuracy: 0.7720 - val_loss: 0.5032 - val_accuracy: 0.7404
Epoch 30/50
40/40 - 0s - loss: 0.4432 - accuracy: 0.7691 - val_loss: 0.4951 - val_accuracy: 0.7467
Epoch 31/50
40/40 - 0s - loss: 0.4315 - accuracy: 0.7750 - val_loss: 0.4973 - val_accuracy: 0.7474
Epoch 32/50
40/40 - 0s - loss: 0.4282 - accuracy: 0.7828 - val_loss: 0.5040 - val_accuracy: 0.7380
Epoch 33/50
40/40 - 0s - loss: 0.4219 - accuracy: 0.7832 - val_loss: 0.4863 - val_accuracy: 0.7545
Epoch 34/50
40/40 - 0s - loss: 0.4200 - accuracy: 0.7866 - val_loss: 0.4929 - val_accuracy: 0.7569
Epoch 35/50
40/40 - 0s - loss: 0.4241 - accuracy: 0.7809 - val_loss: 0.4988 - val_accuracy: 0.7530
Epoch 36/50
40/40 - 0s - loss: 0.4209 - accuracy: 0.7870 - val_loss: 0.5025 - val_accuracy: 0.7396
Epoch 37/50
40/40 - 0s - loss: 0.4185 - accuracy: 0.7823 - val_loss: 0.4848 - val_accuracy: 0.7561
Epoch 38/50
40/40 - 0s - loss: 0.4151 - accuracy: 0.7880 - val_loss: 0.4937 - val_accuracy: 0.7482
Epoch 39/50
40/40 - 0s - loss: 0.4158 - accuracy: 0.7815 - val_loss: 0.4950 - val_accuracy: 0.7522
Epoch 40/50
40/40 - 0s - loss: 0.4089 - accuracy: 0.7905 - val_loss: 0.4926 - val_accuracy: 0.7467
Epoch 41/50
40/40 - 0s - loss: 0.4124 - accuracy: 0.7884 - val_loss: 0.4960 - val_accuracy: 0.7482
Epoch 42/50
40/40 - 0s - loss: 0.4077 - accuracy: 0.7891 - val_loss: 0.4997 - val_accuracy: 0.7364
Epoch 43/50
40/40 - 0s - loss: 0.4030 - accuracy: 0.7937 - val_loss: 0.4910 - val_accuracy: 0.7490
Epoch 44/50
40/40 - 0s - loss: 0.4019 - accuracy: 0.7964 - val_loss: 0.4949 - val_accuracy: 0.7506
Epoch 45/50
40/40 - 0s - loss: 0.4008 - accuracy: 0.7968 - val_loss: 0.4866 - val_accuracy: 0.7459
Epoch 46/50
40/40 - 0s - loss: 0.4033 - accuracy: 0.7960 - val_loss: 0.5027 - val_accuracy: 0.7498
Epoch 47/50
40/40 - 0s - loss: 0.4018 - accuracy: 0.7956 - val_loss: 0.4876 - val_accuracy: 0.7545
Epoch 48/50
40/40 - 0s - loss: 0.4215 - accuracy: 0.7852 - val_loss: 0.4994 - val_accuracy: 0.7467
Epoch 49/50
40/40 - 0s - loss: 0.3989 - accuracy: 0.7962 - val_loss: 0.4932 - val_accuracy: 0.7427
Epoch 50/50
40/40 - 0s - loss: 0.3969 - accuracy: 0.7950 - val_loss: 0.4856 - val_accuracy: 0.7522
plot(history1)
../_images/dl-simple-case_35_0.png
model1.evaluate(X_test, y_test, batch_size=128, verbose=2)
13/13 - 0s - loss: 0.5277 - accuracy: 0.7558
[0.5276997089385986, 0.7558212876319885]

Model 2

  • One Embedding Layer + Two layers of fully-connected dense layers

EMBEDDING_DIM = 128
model2 = Sequential()
model2.add(Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len, mask_zero=True))
model2.add(layers.GlobalAveragePooling1D()) ## The GlobalAveragePooling1D layer returns a fixed-length output vector for each example by averaging over the sequence dimension. This allows the model to handle input of variable length, in the simplest way possible.
model2.add(layers.Dense(128, activation="relu", name="dense_layer_1"))
model2.add(layers.Dense(128, activation="relu", name="dense_layer_2"))
model2.add(layers.Dense(2, activation="softmax", name="output"))

model2.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"]
)
plot_model(model2, show_shapes=True)
../_images/dl-simple-case_39_0.png
history2 = model2.fit(X_train, y_train, 
                    batch_size=128, 
                    epochs=50, verbose=2,
                    validation_split = 0.2)
Epoch 1/50
40/40 - 1s - loss: 0.6327 - accuracy: 0.6434 - val_loss: 0.5773 - val_accuracy: 0.7113
Epoch 2/50
40/40 - 0s - loss: 0.5739 - accuracy: 0.7036 - val_loss: 0.5572 - val_accuracy: 0.7144
Epoch 3/50
40/40 - 0s - loss: 0.5587 - accuracy: 0.7177 - val_loss: 0.5562 - val_accuracy: 0.7215
Epoch 4/50
40/40 - 0s - loss: 0.5527 - accuracy: 0.7268 - val_loss: 0.5538 - val_accuracy: 0.7270
Epoch 5/50
40/40 - 0s - loss: 0.5463 - accuracy: 0.7321 - val_loss: 0.5729 - val_accuracy: 0.7081
Epoch 6/50
40/40 - 0s - loss: 0.5482 - accuracy: 0.7276 - val_loss: 0.5584 - val_accuracy: 0.7183
Epoch 7/50
40/40 - 0s - loss: 0.5380 - accuracy: 0.7333 - val_loss: 0.5472 - val_accuracy: 0.7317
Epoch 8/50
40/40 - 0s - loss: 0.5326 - accuracy: 0.7384 - val_loss: 0.5491 - val_accuracy: 0.7341
Epoch 9/50
40/40 - 0s - loss: 0.5272 - accuracy: 0.7408 - val_loss: 0.5485 - val_accuracy: 0.7262
Epoch 10/50
40/40 - 0s - loss: 0.5221 - accuracy: 0.7463 - val_loss: 0.5577 - val_accuracy: 0.7223
Epoch 11/50
40/40 - 0s - loss: 0.5162 - accuracy: 0.7498 - val_loss: 0.5463 - val_accuracy: 0.7293
Epoch 12/50
40/40 - 0s - loss: 0.5122 - accuracy: 0.7555 - val_loss: 0.5465 - val_accuracy: 0.7309
Epoch 13/50
40/40 - 0s - loss: 0.5065 - accuracy: 0.7543 - val_loss: 0.5418 - val_accuracy: 0.7317
Epoch 14/50
40/40 - 0s - loss: 0.5057 - accuracy: 0.7581 - val_loss: 0.5531 - val_accuracy: 0.7270
Epoch 15/50
40/40 - 0s - loss: 0.5006 - accuracy: 0.7657 - val_loss: 0.5534 - val_accuracy: 0.7309
Epoch 16/50
40/40 - 0s - loss: 0.4945 - accuracy: 0.7689 - val_loss: 0.5460 - val_accuracy: 0.7341
Epoch 17/50
40/40 - 0s - loss: 0.4884 - accuracy: 0.7718 - val_loss: 0.5480 - val_accuracy: 0.7388
Epoch 18/50
40/40 - 0s - loss: 0.4863 - accuracy: 0.7758 - val_loss: 0.5493 - val_accuracy: 0.7278
Epoch 19/50
40/40 - 0s - loss: 0.4812 - accuracy: 0.7728 - val_loss: 0.5502 - val_accuracy: 0.7270
Epoch 20/50
40/40 - 0s - loss: 0.4752 - accuracy: 0.7825 - val_loss: 0.5525 - val_accuracy: 0.7254
Epoch 21/50
40/40 - 0s - loss: 0.4722 - accuracy: 0.7799 - val_loss: 0.5516 - val_accuracy: 0.7325
Epoch 22/50
40/40 - 0s - loss: 0.4708 - accuracy: 0.7813 - val_loss: 0.5538 - val_accuracy: 0.7309
Epoch 23/50
40/40 - 0s - loss: 0.4636 - accuracy: 0.7840 - val_loss: 0.5533 - val_accuracy: 0.7396
Epoch 24/50
40/40 - 0s - loss: 0.4584 - accuracy: 0.7909 - val_loss: 0.5741 - val_accuracy: 0.7136
Epoch 25/50
40/40 - 0s - loss: 0.4606 - accuracy: 0.7878 - val_loss: 0.5630 - val_accuracy: 0.7215
Epoch 26/50
40/40 - 0s - loss: 0.4580 - accuracy: 0.7821 - val_loss: 0.5575 - val_accuracy: 0.7254
Epoch 27/50
40/40 - 0s - loss: 0.4490 - accuracy: 0.7923 - val_loss: 0.5561 - val_accuracy: 0.7317
Epoch 28/50
40/40 - 0s - loss: 0.4479 - accuracy: 0.7917 - val_loss: 0.5601 - val_accuracy: 0.7341
Epoch 29/50
40/40 - 0s - loss: 0.4453 - accuracy: 0.7927 - val_loss: 0.5580 - val_accuracy: 0.7317
Epoch 30/50
40/40 - 0s - loss: 0.4423 - accuracy: 0.7939 - val_loss: 0.5719 - val_accuracy: 0.7325
Epoch 31/50
40/40 - 0s - loss: 0.4362 - accuracy: 0.7941 - val_loss: 0.5709 - val_accuracy: 0.7278
Epoch 32/50
40/40 - 0s - loss: 0.4361 - accuracy: 0.8004 - val_loss: 0.5579 - val_accuracy: 0.7356
Epoch 33/50
40/40 - 0s - loss: 0.4327 - accuracy: 0.7986 - val_loss: 0.5628 - val_accuracy: 0.7207
Epoch 34/50
40/40 - 0s - loss: 0.4301 - accuracy: 0.8029 - val_loss: 0.5740 - val_accuracy: 0.7175
Epoch 35/50
40/40 - 0s - loss: 0.4278 - accuracy: 0.8066 - val_loss: 0.5702 - val_accuracy: 0.7128
Epoch 36/50
40/40 - 0s - loss: 0.4248 - accuracy: 0.8078 - val_loss: 0.5696 - val_accuracy: 0.7325
Epoch 37/50
40/40 - 0s - loss: 0.4167 - accuracy: 0.8125 - val_loss: 0.5786 - val_accuracy: 0.7278
Epoch 38/50
40/40 - 0s - loss: 0.4175 - accuracy: 0.8057 - val_loss: 0.5764 - val_accuracy: 0.7286
Epoch 39/50
40/40 - 0s - loss: 0.4112 - accuracy: 0.8116 - val_loss: 0.5874 - val_accuracy: 0.7105
Epoch 40/50
40/40 - 0s - loss: 0.4063 - accuracy: 0.8141 - val_loss: 0.5694 - val_accuracy: 0.7246
Epoch 41/50
40/40 - 0s - loss: 0.4100 - accuracy: 0.8163 - val_loss: 0.5826 - val_accuracy: 0.7254
Epoch 42/50
40/40 - 0s - loss: 0.4011 - accuracy: 0.8163 - val_loss: 0.5745 - val_accuracy: 0.7325
Epoch 43/50
40/40 - 0s - loss: 0.3966 - accuracy: 0.8183 - val_loss: 0.5769 - val_accuracy: 0.7238
Epoch 44/50
40/40 - 0s - loss: 0.3985 - accuracy: 0.8157 - val_loss: 0.5891 - val_accuracy: 0.7089
Epoch 45/50
40/40 - 0s - loss: 0.3956 - accuracy: 0.8214 - val_loss: 0.5919 - val_accuracy: 0.7254
Epoch 46/50
40/40 - 0s - loss: 0.3887 - accuracy: 0.8238 - val_loss: 0.5976 - val_accuracy: 0.7333
Epoch 47/50
40/40 - 0s - loss: 0.3853 - accuracy: 0.8267 - val_loss: 0.5894 - val_accuracy: 0.7238
Epoch 48/50
40/40 - 0s - loss: 0.3793 - accuracy: 0.8291 - val_loss: 0.6013 - val_accuracy: 0.7191
Epoch 49/50
40/40 - 0s - loss: 0.3794 - accuracy: 0.8263 - val_loss: 0.6029 - val_accuracy: 0.7231
Epoch 50/50
40/40 - 0s - loss: 0.3743 - accuracy: 0.8332 - val_loss: 0.6209 - val_accuracy: 0.7152
plot(history2)
../_images/dl-simple-case_41_0.png
model2.evaluate(X_test, y_test, batch_size=128, verbose=2)
13/13 - 0s - loss: 0.5622 - accuracy: 0.7495
[0.5622150301933289, 0.7495279908180237]

Check Embeddings

  • Compared to one-hot encodings of characters, embeddings may include more information relating to the characteristics of the characters.

  • We can extract the embedding layer and apply dimensional reduction techniques (i.e., TSNE) to see how embeddings capture the relationships in-between characters.

ind2char = tokenizer.index_word
[ind2char.get(i) for i in X_test[10]]
[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 'j',
 'e',
 'r',
 'r',
 'o',
 'l',
 'd']
char_vectors = model2.layers[0].get_weights()[0]
char_vectors.shape
(30, 128)
labels = [char for (ind, char) in tokenizer.index_word.items()]
labels.insert(0,None)
labels
[None,
 'a',
 'e',
 'i',
 'n',
 'r',
 'l',
 'o',
 't',
 's',
 'd',
 'y',
 'm',
 'h',
 'c',
 'b',
 'u',
 'g',
 'k',
 'j',
 'v',
 'f',
 'p',
 'w',
 'z',
 'x',
 'q',
 '-',
 ' ',
 "'"]
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, random_state=0, n_iter=5000, perplexity=2)
np.set_printoptions(suppress=True)
T = tsne.fit_transform(char_vectors)
labels = labels

plt.figure(figsize=(10, 7), dpi=150)
plt.scatter(T[:, 0], T[:, 1], c='orange', edgecolors='r')
for label, x, y in zip(labels, T[:, 0], T[:, 1]):
    plt.annotate(label, xy=(x+1, y+1), xytext=(0, 0), textcoords='offset points')
../_images/dl-simple-case_48_0.png

Issues of Word/Character Representations

  • One-hot encoding does not indicate semantic relationships between characters.

  • For deep learning NLP, it is preferred to convert one-hot encodings of words/characters into embeddings, which are argued to include more semantic information of the tokens.

  • Now the question is how to train and create better word embeddings. We will come back to this issue later.

Hyperparameter Tuning

  • Like feature-based ML methods, neural networks also come with many hyperparameters, which require default values.

  • Typical hyperparameters include:

    • Number of nodes for the layer

    • Learning Rates

  • We can utilize the module, kerastuner, to fine-tune the hyperparameters.

  • Steps for Keras Tuner

    • First, wrap the model definition in a function, which takes a single hp argument.

    • Inside this function, replace any value we want to tune with a call to hyperparameter sampling methods, e.g. hp.Int() or hp.Choice(). The function should return a compiled model.

    • Next, instantiate a tuner object specifying your optimization objective and other search parameters.

    • Finally, start the search with the search() method, which takes the same arguments as Model.fit() in keras.

    • When search is over, we can retrieve the best model and a summary of the results from the tunner.

import kerastuner
## Wrap model definition in a function
## and specify the parameters needed for tuning
def build_model(hp):
    model1 = keras.Sequential()
    model1.add(keras.Input(shape=(max_len,)))
    model1.add(layers.Dense(hp.Int('units', min_value=32, max_value=128, step=32), activation="relu", name="dense_layer_1"))
    model1.add(layers.Dense(hp.Int('units', min_value=32, max_value=128, step=32), activation="relu", name="dense_layer_2"))
    model1.add(layers.Dense(2, activation="softmax", name="output"))
    model1.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate',
                      values=[1e-2, 1e-3, 1e-4])),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'])
    return model1

# def build_model(hp):
#     inputs = keras.Input(shape=(784,))
#     x = layers.Dense(
#         units=hp.Int('units', min_value=32, max_value=512, step=32),
#         activation='relu'))(inputs)
#     outputs = layers.Dense(10, activation='softmax')(x)
#     model = keras.Model(inputs, outputs)
#     model.compile(
#         optimizer=keras.optimizers.Adam(
#             hp.Choice('learning_rate',
#                       values=[1e-2, 1e-3, 1e-4])),
#         loss='sparse_categorical_crossentropy',
#         metrics=['accuracy'])
#     return model
## This is to clean up the temp dir from the tuner
## Every time we re-start the tunner, it's better to keep the temp dir clean

import os
import shutil

if os.path.isdir('my_dir'):
    shutil.rmtree('my_dir')
    
## Instantiate the tunner

tuner = kerastuner.tuners.RandomSearch(
  build_model,
  objective='val_accuracy',
  max_trials=10,
  executions_per_trial=3,
  directory='my_dir')
## Check the tuner's search space
tuner.search_space_summary()
Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 128, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
## Start tuning with the tuner
tuner.search(X_train, y_train, validation_split=0.2, batch_size=128)
Trial 9 Complete [00h 00m 03s]
val_accuracy: 0.7191188136736552

Best val_accuracy So Far: 0.7191188136736552
Total elapsed time: 00h 00m 31s
INFO:tensorflow:Oracle triggered exit
## Retrieve the best models from the tuner
models = tuner.get_best_models(num_models=2)
## Retrieve the summary of results from the tuner
tuner.results_summary()
Results summary
Results in my_dir/untitled_project
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
units: 128
learning_rate: 0.001
Score: 0.7133490641911825
Trial summary
Hyperparameters:
units: 96
learning_rate: 0.01
Score: 0.7125623027483622
Trial summary
Hyperparameters:
units: 96
learning_rate: 0.001
Score: 0.7004982829093933
Trial summary
Hyperparameters:
units: 64
learning_rate: 0.001
Score: 0.6949908137321472
Trial summary
Hyperparameters:
units: 128
learning_rate: 0.0001
Score: 0.6197220087051392
Trial summary
Hyperparameters:
units: 64
learning_rate: 0.0001
Score: 0.5727773408095042

Sequence Models

Model 3

  • One Embedding Layer + LSTM + Dense Layer

EMBEDDING_DIM = 128
model3 = Sequential()
model3.add(Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len, mask_zero=True))
#model3.add(SpatialDropout1D(0.2))
model3.add(LSTM(64))# , dropout=0.2, recurrent_dropout=0.2))
model3.add(Dense(2, activation="softmax"))

model3.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"]
)
plot_model(model3, show_shapes=True)
../_images/dl-simple-case_65_0.png
history3 = model3.fit(X_train, y_train, 
                    batch_size=128, 
                    epochs=50, verbose=2,
                   validation_split = 0.2)
Epoch 1/50
40/40 - 6s - loss: 0.6243 - accuracy: 0.6412 - val_loss: 0.5494 - val_accuracy: 0.7293
Epoch 2/50
40/40 - 1s - loss: 0.5036 - accuracy: 0.7606 - val_loss: 0.4463 - val_accuracy: 0.7836
Epoch 3/50
40/40 - 1s - loss: 0.4457 - accuracy: 0.7801 - val_loss: 0.4239 - val_accuracy: 0.8041
Epoch 4/50
40/40 - 1s - loss: 0.4347 - accuracy: 0.7907 - val_loss: 0.4203 - val_accuracy: 0.7946
Epoch 5/50
40/40 - 1s - loss: 0.4294 - accuracy: 0.7939 - val_loss: 0.4139 - val_accuracy: 0.8080
Epoch 6/50
40/40 - 1s - loss: 0.4259 - accuracy: 0.7966 - val_loss: 0.4108 - val_accuracy: 0.8127
Epoch 7/50
40/40 - 1s - loss: 0.4222 - accuracy: 0.8004 - val_loss: 0.4045 - val_accuracy: 0.8135
Epoch 8/50
40/40 - 1s - loss: 0.4190 - accuracy: 0.8025 - val_loss: 0.4051 - val_accuracy: 0.8175
Epoch 9/50
40/40 - 1s - loss: 0.4149 - accuracy: 0.8004 - val_loss: 0.4004 - val_accuracy: 0.8214
Epoch 10/50
40/40 - 1s - loss: 0.4115 - accuracy: 0.8017 - val_loss: 0.3998 - val_accuracy: 0.8143
Epoch 11/50
40/40 - 1s - loss: 0.4075 - accuracy: 0.8009 - val_loss: 0.3972 - val_accuracy: 0.8175
Epoch 12/50
40/40 - 1s - loss: 0.4043 - accuracy: 0.8102 - val_loss: 0.3927 - val_accuracy: 0.8206
Epoch 13/50
40/40 - 1s - loss: 0.4007 - accuracy: 0.8068 - val_loss: 0.3951 - val_accuracy: 0.8183
Epoch 14/50
40/40 - 1s - loss: 0.3969 - accuracy: 0.8078 - val_loss: 0.3899 - val_accuracy: 0.8175
Epoch 15/50
40/40 - 1s - loss: 0.3942 - accuracy: 0.8104 - val_loss: 0.3883 - val_accuracy: 0.8222
Epoch 16/50
40/40 - 1s - loss: 0.3885 - accuracy: 0.8129 - val_loss: 0.3877 - val_accuracy: 0.8230
Epoch 17/50
40/40 - 1s - loss: 0.3855 - accuracy: 0.8127 - val_loss: 0.3896 - val_accuracy: 0.8206
Epoch 18/50
40/40 - 1s - loss: 0.3817 - accuracy: 0.8133 - val_loss: 0.3869 - val_accuracy: 0.8222
Epoch 19/50
40/40 - 1s - loss: 0.3798 - accuracy: 0.8161 - val_loss: 0.3863 - val_accuracy: 0.8167
Epoch 20/50
40/40 - 1s - loss: 0.3759 - accuracy: 0.8242 - val_loss: 0.3802 - val_accuracy: 0.8245
Epoch 21/50
40/40 - 1s - loss: 0.3693 - accuracy: 0.8277 - val_loss: 0.3828 - val_accuracy: 0.8293
Epoch 22/50
40/40 - 1s - loss: 0.3669 - accuracy: 0.8253 - val_loss: 0.3824 - val_accuracy: 0.8198
Epoch 23/50
40/40 - 1s - loss: 0.3629 - accuracy: 0.8299 - val_loss: 0.3823 - val_accuracy: 0.8190
Epoch 24/50
40/40 - 1s - loss: 0.3595 - accuracy: 0.8301 - val_loss: 0.3820 - val_accuracy: 0.8332
Epoch 25/50
40/40 - 1s - loss: 0.3545 - accuracy: 0.8322 - val_loss: 0.3859 - val_accuracy: 0.8277
Epoch 26/50
40/40 - 1s - loss: 0.3492 - accuracy: 0.8287 - val_loss: 0.3833 - val_accuracy: 0.8261
Epoch 27/50
40/40 - 1s - loss: 0.3449 - accuracy: 0.8326 - val_loss: 0.3874 - val_accuracy: 0.8238
Epoch 28/50
40/40 - 1s - loss: 0.3404 - accuracy: 0.8413 - val_loss: 0.3796 - val_accuracy: 0.8277
Epoch 29/50
40/40 - 1s - loss: 0.3344 - accuracy: 0.8393 - val_loss: 0.3824 - val_accuracy: 0.8261
Epoch 30/50
40/40 - 1s - loss: 0.3281 - accuracy: 0.8448 - val_loss: 0.3902 - val_accuracy: 0.8269
Epoch 31/50
40/40 - 1s - loss: 0.3262 - accuracy: 0.8442 - val_loss: 0.3846 - val_accuracy: 0.8253
Epoch 32/50
40/40 - 1s - loss: 0.3207 - accuracy: 0.8485 - val_loss: 0.3854 - val_accuracy: 0.8285
Epoch 33/50
40/40 - 1s - loss: 0.3172 - accuracy: 0.8505 - val_loss: 0.3898 - val_accuracy: 0.8214
Epoch 34/50
40/40 - 1s - loss: 0.3119 - accuracy: 0.8558 - val_loss: 0.3863 - val_accuracy: 0.8261
Epoch 35/50
40/40 - 1s - loss: 0.3056 - accuracy: 0.8552 - val_loss: 0.3890 - val_accuracy: 0.8183
Epoch 36/50
40/40 - 1s - loss: 0.3006 - accuracy: 0.8603 - val_loss: 0.3949 - val_accuracy: 0.8285
Epoch 37/50
40/40 - 1s - loss: 0.2954 - accuracy: 0.8623 - val_loss: 0.3987 - val_accuracy: 0.8198
Epoch 38/50
40/40 - 1s - loss: 0.2913 - accuracy: 0.8672 - val_loss: 0.3932 - val_accuracy: 0.8190
Epoch 39/50
40/40 - 1s - loss: 0.2832 - accuracy: 0.8698 - val_loss: 0.4070 - val_accuracy: 0.8285
Epoch 40/50
40/40 - 1s - loss: 0.2808 - accuracy: 0.8680 - val_loss: 0.3966 - val_accuracy: 0.8285
Epoch 41/50
40/40 - 1s - loss: 0.2732 - accuracy: 0.8723 - val_loss: 0.4122 - val_accuracy: 0.8245
Epoch 42/50
40/40 - 1s - loss: 0.2672 - accuracy: 0.8765 - val_loss: 0.4087 - val_accuracy: 0.8159
Epoch 43/50
40/40 - 1s - loss: 0.2648 - accuracy: 0.8824 - val_loss: 0.4100 - val_accuracy: 0.8151
Epoch 44/50
40/40 - 1s - loss: 0.2579 - accuracy: 0.8836 - val_loss: 0.4197 - val_accuracy: 0.8238
Epoch 45/50
40/40 - 1s - loss: 0.2508 - accuracy: 0.8875 - val_loss: 0.4185 - val_accuracy: 0.8183
Epoch 46/50
40/40 - 1s - loss: 0.2482 - accuracy: 0.8859 - val_loss: 0.4231 - val_accuracy: 0.8120
Epoch 47/50
40/40 - 1s - loss: 0.2428 - accuracy: 0.8914 - val_loss: 0.4277 - val_accuracy: 0.8167
Epoch 48/50
40/40 - 1s - loss: 0.2387 - accuracy: 0.8959 - val_loss: 0.4229 - val_accuracy: 0.8127
Epoch 49/50
40/40 - 1s - loss: 0.2327 - accuracy: 0.8936 - val_loss: 0.4281 - val_accuracy: 0.8206
Epoch 50/50
40/40 - 1s - loss: 0.2290 - accuracy: 0.8971 - val_loss: 0.4390 - val_accuracy: 0.8159
plot(history3)
../_images/dl-simple-case_67_0.png

Model 4

  • One Embedding Layer + Two Stacked LSTM + Dense Layer

EMBEDDING_DIM = 128
model4 = Sequential()
model4.add(Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len, mask_zero=True))
#model.add(SpatialDropout1D(0.2))
model4.add(LSTM(64, return_sequences=True)) #, dropout=0.2, recurrent_dropout=0.2))
model4.add(LSTM(64))
model4.add(Dense(2, activation="softmax"))

model4.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"]
)
plot_model(model4,show_shapes=True)
../_images/dl-simple-case_70_0.png
history4 = model4.fit(X_train, y_train, 
                    batch_size=128, 
                    epochs=50, verbose=2,
                   validation_split = 0.2)
Epoch 1/50
40/40 - 10s - loss: 0.6456 - accuracy: 0.6275 - val_loss: 0.6029 - val_accuracy: 0.6672
Epoch 2/50
40/40 - 1s - loss: 0.5884 - accuracy: 0.6841 - val_loss: 0.5460 - val_accuracy: 0.7364
Epoch 3/50
40/40 - 1s - loss: 0.4911 - accuracy: 0.7620 - val_loss: 0.4315 - val_accuracy: 0.7939
Epoch 4/50
40/40 - 1s - loss: 0.4400 - accuracy: 0.7864 - val_loss: 0.4203 - val_accuracy: 0.7939
Epoch 5/50
40/40 - 1s - loss: 0.4337 - accuracy: 0.7899 - val_loss: 0.4144 - val_accuracy: 0.7994
Epoch 6/50
40/40 - 1s - loss: 0.4296 - accuracy: 0.7952 - val_loss: 0.4101 - val_accuracy: 0.8072
Epoch 7/50
40/40 - 1s - loss: 0.4233 - accuracy: 0.7972 - val_loss: 0.4046 - val_accuracy: 0.8088
Epoch 8/50
40/40 - 1s - loss: 0.4195 - accuracy: 0.7962 - val_loss: 0.4021 - val_accuracy: 0.8175
Epoch 9/50
40/40 - 1s - loss: 0.4145 - accuracy: 0.7970 - val_loss: 0.3953 - val_accuracy: 0.8222
Epoch 10/50
40/40 - 1s - loss: 0.4095 - accuracy: 0.8055 - val_loss: 0.3970 - val_accuracy: 0.8175
Epoch 11/50
40/40 - 1s - loss: 0.4065 - accuracy: 0.8013 - val_loss: 0.3947 - val_accuracy: 0.8167
Epoch 12/50
40/40 - 1s - loss: 0.4009 - accuracy: 0.8025 - val_loss: 0.3954 - val_accuracy: 0.8230
Epoch 13/50
40/40 - 1s - loss: 0.3941 - accuracy: 0.8053 - val_loss: 0.3945 - val_accuracy: 0.8167
Epoch 14/50
40/40 - 1s - loss: 0.3936 - accuracy: 0.8053 - val_loss: 0.3856 - val_accuracy: 0.8238
Epoch 15/50
40/40 - 1s - loss: 0.3871 - accuracy: 0.8108 - val_loss: 0.3823 - val_accuracy: 0.8222
Epoch 16/50
40/40 - 1s - loss: 0.3831 - accuracy: 0.8108 - val_loss: 0.3860 - val_accuracy: 0.8253
Epoch 17/50
40/40 - 1s - loss: 0.3797 - accuracy: 0.8167 - val_loss: 0.3837 - val_accuracy: 0.8183
Epoch 18/50
40/40 - 1s - loss: 0.3778 - accuracy: 0.8210 - val_loss: 0.3851 - val_accuracy: 0.8183
Epoch 19/50
40/40 - 1s - loss: 0.3736 - accuracy: 0.8216 - val_loss: 0.4027 - val_accuracy: 0.8088
Epoch 20/50
40/40 - 1s - loss: 0.3683 - accuracy: 0.8279 - val_loss: 0.3774 - val_accuracy: 0.8277
Epoch 21/50
40/40 - 1s - loss: 0.3613 - accuracy: 0.8297 - val_loss: 0.3832 - val_accuracy: 0.8206
Epoch 22/50
40/40 - 1s - loss: 0.3605 - accuracy: 0.8326 - val_loss: 0.3839 - val_accuracy: 0.8214
Epoch 23/50
40/40 - 1s - loss: 0.3549 - accuracy: 0.8277 - val_loss: 0.3842 - val_accuracy: 0.8214
Epoch 24/50
40/40 - 1s - loss: 0.3508 - accuracy: 0.8316 - val_loss: 0.3832 - val_accuracy: 0.8269
Epoch 25/50
40/40 - 1s - loss: 0.3469 - accuracy: 0.8369 - val_loss: 0.3843 - val_accuracy: 0.8269
Epoch 26/50
40/40 - 1s - loss: 0.3408 - accuracy: 0.8381 - val_loss: 0.3836 - val_accuracy: 0.8253
Epoch 27/50
40/40 - 1s - loss: 0.3359 - accuracy: 0.8403 - val_loss: 0.3832 - val_accuracy: 0.8214
Epoch 28/50
40/40 - 1s - loss: 0.3291 - accuracy: 0.8458 - val_loss: 0.3893 - val_accuracy: 0.8261
Epoch 29/50
40/40 - 1s - loss: 0.3256 - accuracy: 0.8478 - val_loss: 0.3854 - val_accuracy: 0.8277
Epoch 30/50
40/40 - 2s - loss: 0.3222 - accuracy: 0.8501 - val_loss: 0.3886 - val_accuracy: 0.8277
Epoch 31/50
40/40 - 1s - loss: 0.3126 - accuracy: 0.8533 - val_loss: 0.4062 - val_accuracy: 0.8175
Epoch 32/50
40/40 - 1s - loss: 0.3093 - accuracy: 0.8527 - val_loss: 0.3949 - val_accuracy: 0.8222
Epoch 33/50
40/40 - 1s - loss: 0.3002 - accuracy: 0.8582 - val_loss: 0.3987 - val_accuracy: 0.8190
Epoch 34/50
40/40 - 1s - loss: 0.2936 - accuracy: 0.8588 - val_loss: 0.3972 - val_accuracy: 0.8143
Epoch 35/50
40/40 - 1s - loss: 0.2890 - accuracy: 0.8643 - val_loss: 0.4175 - val_accuracy: 0.8222
Epoch 36/50
40/40 - 1s - loss: 0.2783 - accuracy: 0.8698 - val_loss: 0.4109 - val_accuracy: 0.8159
Epoch 37/50
40/40 - 1s - loss: 0.2722 - accuracy: 0.8753 - val_loss: 0.4118 - val_accuracy: 0.8222
Epoch 38/50
40/40 - 1s - loss: 0.2625 - accuracy: 0.8763 - val_loss: 0.4205 - val_accuracy: 0.8238
Epoch 39/50
40/40 - 1s - loss: 0.2576 - accuracy: 0.8824 - val_loss: 0.4374 - val_accuracy: 0.8206
Epoch 40/50
40/40 - 1s - loss: 0.2497 - accuracy: 0.8859 - val_loss: 0.4324 - val_accuracy: 0.8183
Epoch 41/50
40/40 - 1s - loss: 0.2417 - accuracy: 0.8875 - val_loss: 0.4376 - val_accuracy: 0.8183
Epoch 42/50
40/40 - 1s - loss: 0.2395 - accuracy: 0.8853 - val_loss: 0.4498 - val_accuracy: 0.8096
Epoch 43/50
40/40 - 1s - loss: 0.2303 - accuracy: 0.8946 - val_loss: 0.4628 - val_accuracy: 0.8112
Epoch 44/50
40/40 - 1s - loss: 0.2228 - accuracy: 0.8979 - val_loss: 0.4532 - val_accuracy: 0.8167
Epoch 45/50
40/40 - 1s - loss: 0.2212 - accuracy: 0.8967 - val_loss: 0.4700 - val_accuracy: 0.8127
Epoch 46/50
40/40 - 1s - loss: 0.2084 - accuracy: 0.9017 - val_loss: 0.4759 - val_accuracy: 0.8214
Epoch 47/50
40/40 - 1s - loss: 0.2057 - accuracy: 0.9076 - val_loss: 0.4780 - val_accuracy: 0.8080
Epoch 48/50
40/40 - 1s - loss: 0.2002 - accuracy: 0.9068 - val_loss: 0.5026 - val_accuracy: 0.8135
Epoch 49/50
40/40 - 1s - loss: 0.1918 - accuracy: 0.9138 - val_loss: 0.4944 - val_accuracy: 0.8049
Epoch 50/50
40/40 - 1s - loss: 0.1876 - accuracy: 0.9129 - val_loss: 0.5131 - val_accuracy: 0.8057
plot(history4)
../_images/dl-simple-case_72_0.png

Model 5

  • One Embedding Layer + LSTM [hidden state of last time step + cell state of last time step] + Dense Layer

EMBEDDING_DIM = 128

inputs = keras.Input(shape=(max_len,))
x=layers.Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len, mask_zero=True)(inputs)
#x=layers.SpatialDropout1D(0.2)(x)
x_all_h,x_last_h, x_c = layers.LSTM(64, dropout=0.2, 
                               recurrent_dropout=0.2, 
                               return_sequences=False, return_state=True)(x)
## LSTM Parameters:
#     `return_seqeunces=True`: return the hidden states for each time step
#     `return_state=True`: return the cell state of the last time step
#     When both are set True, the return values of LSTM are:
#     (1) the hidden states of all time steps (when `return_sequences=True`) or the hidden state of the last time step
#     (2) the hidden state of the last time step
#     (3) the cell state of the last time step

x = layers.Concatenate(axis=1)([x_last_h, x_c])
outputs=layers.Dense(2, activation='softmax')(x)
model5 = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")

plot_model(model5, show_shapes=True)
../_images/dl-simple-case_74_0.png
model5.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"]
)
history5 = model5.fit(X_train, y_train, 
                    batch_size=128, 
                    epochs=50, verbose=2,
                   validation_split = 0.2)
Epoch 1/50
40/40 - 4s - loss: 0.6023 - accuracy: 0.6621 - val_loss: 0.5197 - val_accuracy: 0.7537
Epoch 2/50
40/40 - 1s - loss: 0.4781 - accuracy: 0.7606 - val_loss: 0.4376 - val_accuracy: 0.7891
Epoch 3/50
40/40 - 1s - loss: 0.4450 - accuracy: 0.7840 - val_loss: 0.4225 - val_accuracy: 0.8096
Epoch 4/50
40/40 - 1s - loss: 0.4348 - accuracy: 0.7925 - val_loss: 0.4229 - val_accuracy: 0.8096
Epoch 5/50
40/40 - 1s - loss: 0.4324 - accuracy: 0.7960 - val_loss: 0.4190 - val_accuracy: 0.8104
Epoch 6/50
40/40 - 1s - loss: 0.4290 - accuracy: 0.7941 - val_loss: 0.4148 - val_accuracy: 0.8104
Epoch 7/50
40/40 - 1s - loss: 0.4275 - accuracy: 0.7996 - val_loss: 0.4135 - val_accuracy: 0.8198
Epoch 8/50
40/40 - 1s - loss: 0.4258 - accuracy: 0.7980 - val_loss: 0.4117 - val_accuracy: 0.8135
Epoch 9/50
40/40 - 1s - loss: 0.4204 - accuracy: 0.8029 - val_loss: 0.4058 - val_accuracy: 0.8198
Epoch 10/50
40/40 - 1s - loss: 0.4201 - accuracy: 0.7996 - val_loss: 0.4035 - val_accuracy: 0.8096
Epoch 11/50
40/40 - 1s - loss: 0.4137 - accuracy: 0.8051 - val_loss: 0.4024 - val_accuracy: 0.8135
Epoch 12/50
40/40 - 1s - loss: 0.4124 - accuracy: 0.8009 - val_loss: 0.4012 - val_accuracy: 0.8175
Epoch 13/50
40/40 - 1s - loss: 0.4110 - accuracy: 0.7994 - val_loss: 0.3979 - val_accuracy: 0.8198
Epoch 14/50
40/40 - 1s - loss: 0.4079 - accuracy: 0.8051 - val_loss: 0.3955 - val_accuracy: 0.8230
Epoch 15/50
40/40 - 1s - loss: 0.4050 - accuracy: 0.8080 - val_loss: 0.3979 - val_accuracy: 0.8198
Epoch 16/50
40/40 - 1s - loss: 0.4054 - accuracy: 0.8070 - val_loss: 0.3948 - val_accuracy: 0.8190
Epoch 17/50
40/40 - 1s - loss: 0.4007 - accuracy: 0.8088 - val_loss: 0.3943 - val_accuracy: 0.8238
Epoch 18/50
40/40 - 1s - loss: 0.3969 - accuracy: 0.8072 - val_loss: 0.3910 - val_accuracy: 0.8198
Epoch 19/50
40/40 - 1s - loss: 0.3956 - accuracy: 0.8104 - val_loss: 0.3896 - val_accuracy: 0.8120
Epoch 20/50
40/40 - 1s - loss: 0.3925 - accuracy: 0.8096 - val_loss: 0.3885 - val_accuracy: 0.8167
Epoch 21/50
40/40 - 1s - loss: 0.3923 - accuracy: 0.8147 - val_loss: 0.3890 - val_accuracy: 0.8127
Epoch 22/50
40/40 - 1s - loss: 0.3877 - accuracy: 0.8175 - val_loss: 0.3862 - val_accuracy: 0.8206
Epoch 23/50
40/40 - 1s - loss: 0.3828 - accuracy: 0.8112 - val_loss: 0.3863 - val_accuracy: 0.8167
Epoch 24/50
40/40 - 1s - loss: 0.3810 - accuracy: 0.8185 - val_loss: 0.3837 - val_accuracy: 0.8222
Epoch 25/50
40/40 - 1s - loss: 0.3801 - accuracy: 0.8127 - val_loss: 0.3851 - val_accuracy: 0.8230
Epoch 26/50
40/40 - 1s - loss: 0.3746 - accuracy: 0.8188 - val_loss: 0.3826 - val_accuracy: 0.8277
Epoch 27/50
40/40 - 1s - loss: 0.3742 - accuracy: 0.8196 - val_loss: 0.3837 - val_accuracy: 0.8261
Epoch 28/50
40/40 - 1s - loss: 0.3717 - accuracy: 0.8220 - val_loss: 0.3826 - val_accuracy: 0.8245
Epoch 29/50
40/40 - 1s - loss: 0.3661 - accuracy: 0.8249 - val_loss: 0.3804 - val_accuracy: 0.8253
Epoch 30/50
40/40 - 1s - loss: 0.3680 - accuracy: 0.8232 - val_loss: 0.3828 - val_accuracy: 0.8222
Epoch 31/50
40/40 - 1s - loss: 0.3601 - accuracy: 0.8279 - val_loss: 0.3815 - val_accuracy: 0.8222
Epoch 32/50
40/40 - 1s - loss: 0.3527 - accuracy: 0.8301 - val_loss: 0.3812 - val_accuracy: 0.8214
Epoch 33/50
40/40 - 1s - loss: 0.3521 - accuracy: 0.8362 - val_loss: 0.3835 - val_accuracy: 0.8277
Epoch 34/50
40/40 - 1s - loss: 0.3526 - accuracy: 0.8312 - val_loss: 0.3845 - val_accuracy: 0.8198
Epoch 35/50
40/40 - 1s - loss: 0.3406 - accuracy: 0.8365 - val_loss: 0.3832 - val_accuracy: 0.8198
Epoch 36/50
40/40 - 1s - loss: 0.3444 - accuracy: 0.8338 - val_loss: 0.3806 - val_accuracy: 0.8253
Epoch 37/50
40/40 - 1s - loss: 0.3396 - accuracy: 0.8342 - val_loss: 0.3888 - val_accuracy: 0.8214
Epoch 38/50
40/40 - 1s - loss: 0.3379 - accuracy: 0.8391 - val_loss: 0.3845 - val_accuracy: 0.8190
Epoch 39/50
40/40 - 1s - loss: 0.3296 - accuracy: 0.8483 - val_loss: 0.3911 - val_accuracy: 0.8183
Epoch 40/50
40/40 - 1s - loss: 0.3289 - accuracy: 0.8423 - val_loss: 0.3852 - val_accuracy: 0.8245
Epoch 41/50
40/40 - 1s - loss: 0.3277 - accuracy: 0.8440 - val_loss: 0.3863 - val_accuracy: 0.8238
Epoch 42/50
40/40 - 1s - loss: 0.3198 - accuracy: 0.8497 - val_loss: 0.3839 - val_accuracy: 0.8206
Epoch 43/50
40/40 - 1s - loss: 0.3134 - accuracy: 0.8556 - val_loss: 0.3894 - val_accuracy: 0.8127
Epoch 44/50
40/40 - 1s - loss: 0.3155 - accuracy: 0.8537 - val_loss: 0.3890 - val_accuracy: 0.8198
Epoch 45/50
40/40 - 1s - loss: 0.3109 - accuracy: 0.8531 - val_loss: 0.3933 - val_accuracy: 0.8214
Epoch 46/50
40/40 - 1s - loss: 0.3115 - accuracy: 0.8554 - val_loss: 0.3914 - val_accuracy: 0.8183
Epoch 47/50
40/40 - 1s - loss: 0.3033 - accuracy: 0.8558 - val_loss: 0.3889 - val_accuracy: 0.8127
Epoch 48/50
40/40 - 1s - loss: 0.3002 - accuracy: 0.8578 - val_loss: 0.3889 - val_accuracy: 0.8198
Epoch 49/50
40/40 - 1s - loss: 0.2972 - accuracy: 0.8605 - val_loss: 0.3963 - val_accuracy: 0.8230
Epoch 50/50
40/40 - 1s - loss: 0.2941 - accuracy: 0.8645 - val_loss: 0.3952 - val_accuracy: 0.8159
plot(history5)
../_images/dl-simple-case_76_0.png
model5.evaluate(X_test, y_test, batch_size=128, verbose=2)
13/13 - 0s - loss: 0.3842 - accuracy: 0.8232
[0.3841722905635834, 0.8231592178344727]

Model 6

  • Adding AttentionLayer

    • Use the hidden state h of the last time step and the cell state c of the last time step

    • Check their attention

    • And use [attention out + hidden state h of the last time step] for decision

EMBEDDING_DIM = 128

inputs = keras.Input(shape=(max_len,))
x=layers.Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len)(inputs)
#x=layers.SpatialDropout1D(0.2)(x)
x_all_hs, x_last_h, x_last_c = layers.LSTM(64, dropout=0.2, 
                               recurrent_dropout=0.2, 
                               return_sequences=True, return_state=True)(x)
## LSTM Parameters:
#     `return_seqeunces=True`: return the hidden states for each time step
#     `return_state=True`: return the cell state of the last time step
#     When both are set True, the return values of LSTM are:
#     (1) the hidden state of the last time step
#     (2) the hidden states of all time steps (when `return_sequences=True`) or the hidden state of the last time step
#     (3) the cell state of the last time step


atten_out = layers.Attention()([x_last_h, x_last_c])

x = layers.Concatenate(axis=1)([x_last_h, atten_out])
outputs=layers.Dense(2, activation='softmax')(x)
model6 = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")

plot_model(model6, show_shapes=True)
../_images/dl-simple-case_79_0.png
model6.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"]
)
history6 = model6.fit(X_train, y_train, 
                    batch_size=128, 
                    epochs=50, verbose=2,
                   validation_split = 0.2)
Epoch 1/50
40/40 - 4s - loss: 0.6362 - accuracy: 0.6290 - val_loss: 0.5719 - val_accuracy: 0.6994
Epoch 2/50
40/40 - 1s - loss: 0.5220 - accuracy: 0.7400 - val_loss: 0.4661 - val_accuracy: 0.7710
Epoch 3/50
40/40 - 1s - loss: 0.4529 - accuracy: 0.7791 - val_loss: 0.4409 - val_accuracy: 0.7852
Epoch 4/50
40/40 - 1s - loss: 0.4442 - accuracy: 0.7891 - val_loss: 0.4220 - val_accuracy: 0.8072
Epoch 5/50
40/40 - 1s - loss: 0.4356 - accuracy: 0.7911 - val_loss: 0.4156 - val_accuracy: 0.8065
Epoch 6/50
40/40 - 1s - loss: 0.4285 - accuracy: 0.7921 - val_loss: 0.4137 - val_accuracy: 0.8135
Epoch 7/50
40/40 - 2s - loss: 0.4239 - accuracy: 0.7992 - val_loss: 0.4093 - val_accuracy: 0.8065
Epoch 8/50
40/40 - 1s - loss: 0.4203 - accuracy: 0.8007 - val_loss: 0.4077 - val_accuracy: 0.8049
Epoch 9/50
40/40 - 1s - loss: 0.4159 - accuracy: 0.8037 - val_loss: 0.4048 - val_accuracy: 0.8104
Epoch 10/50
40/40 - 1s - loss: 0.4128 - accuracy: 0.7968 - val_loss: 0.3992 - val_accuracy: 0.8190
Epoch 11/50
40/40 - 1s - loss: 0.4117 - accuracy: 0.8019 - val_loss: 0.3989 - val_accuracy: 0.8159
Epoch 12/50
40/40 - 1s - loss: 0.4055 - accuracy: 0.8033 - val_loss: 0.3954 - val_accuracy: 0.8096
Epoch 13/50
40/40 - 1s - loss: 0.4047 - accuracy: 0.8063 - val_loss: 0.3966 - val_accuracy: 0.8104
Epoch 14/50
40/40 - 1s - loss: 0.4010 - accuracy: 0.8053 - val_loss: 0.3889 - val_accuracy: 0.8135
Epoch 15/50
40/40 - 1s - loss: 0.3976 - accuracy: 0.8070 - val_loss: 0.4063 - val_accuracy: 0.8088
Epoch 16/50
40/40 - 1s - loss: 0.3968 - accuracy: 0.8108 - val_loss: 0.3864 - val_accuracy: 0.8159
Epoch 17/50
40/40 - 1s - loss: 0.3942 - accuracy: 0.8080 - val_loss: 0.3876 - val_accuracy: 0.8175
Epoch 18/50
40/40 - 1s - loss: 0.3927 - accuracy: 0.8102 - val_loss: 0.3878 - val_accuracy: 0.8159
Epoch 19/50
40/40 - 1s - loss: 0.3900 - accuracy: 0.8110 - val_loss: 0.3831 - val_accuracy: 0.8190
Epoch 20/50
40/40 - 1s - loss: 0.3873 - accuracy: 0.8124 - val_loss: 0.3852 - val_accuracy: 0.8198
Epoch 21/50
40/40 - 1s - loss: 0.3870 - accuracy: 0.8159 - val_loss: 0.3841 - val_accuracy: 0.8206
Epoch 22/50
40/40 - 1s - loss: 0.3825 - accuracy: 0.8135 - val_loss: 0.3849 - val_accuracy: 0.8285
Epoch 23/50
40/40 - 1s - loss: 0.3818 - accuracy: 0.8167 - val_loss: 0.3838 - val_accuracy: 0.8261
Epoch 24/50
40/40 - 1s - loss: 0.3784 - accuracy: 0.8143 - val_loss: 0.3814 - val_accuracy: 0.8198
Epoch 25/50
40/40 - 1s - loss: 0.3761 - accuracy: 0.8181 - val_loss: 0.3830 - val_accuracy: 0.8222
Epoch 26/50
40/40 - 1s - loss: 0.3721 - accuracy: 0.8179 - val_loss: 0.3831 - val_accuracy: 0.8167
Epoch 27/50
40/40 - 1s - loss: 0.3694 - accuracy: 0.8232 - val_loss: 0.3800 - val_accuracy: 0.8222
Epoch 28/50
40/40 - 1s - loss: 0.3672 - accuracy: 0.8226 - val_loss: 0.3806 - val_accuracy: 0.8285
Epoch 29/50
40/40 - 1s - loss: 0.3635 - accuracy: 0.8204 - val_loss: 0.3800 - val_accuracy: 0.8206
Epoch 30/50
40/40 - 1s - loss: 0.3618 - accuracy: 0.8259 - val_loss: 0.3791 - val_accuracy: 0.8190
Epoch 31/50
40/40 - 1s - loss: 0.3569 - accuracy: 0.8279 - val_loss: 0.3787 - val_accuracy: 0.8245
Epoch 32/50
40/40 - 1s - loss: 0.3591 - accuracy: 0.8222 - val_loss: 0.3848 - val_accuracy: 0.8183
Epoch 33/50
40/40 - 1s - loss: 0.3548 - accuracy: 0.8253 - val_loss: 0.3820 - val_accuracy: 0.8190
Epoch 34/50
40/40 - 1s - loss: 0.3483 - accuracy: 0.8348 - val_loss: 0.3763 - val_accuracy: 0.8190
Epoch 35/50
40/40 - 1s - loss: 0.3448 - accuracy: 0.8336 - val_loss: 0.3839 - val_accuracy: 0.8120
Epoch 36/50
40/40 - 1s - loss: 0.3437 - accuracy: 0.8360 - val_loss: 0.3803 - val_accuracy: 0.8214
Epoch 37/50
40/40 - 1s - loss: 0.3406 - accuracy: 0.8362 - val_loss: 0.3818 - val_accuracy: 0.8190
Epoch 38/50
40/40 - 1s - loss: 0.3364 - accuracy: 0.8424 - val_loss: 0.3863 - val_accuracy: 0.8159
Epoch 39/50
40/40 - 1s - loss: 0.3352 - accuracy: 0.8389 - val_loss: 0.3820 - val_accuracy: 0.8198
Epoch 40/50
40/40 - 1s - loss: 0.3291 - accuracy: 0.8440 - val_loss: 0.3865 - val_accuracy: 0.8261
Epoch 41/50
40/40 - 1s - loss: 0.3251 - accuracy: 0.8444 - val_loss: 0.3829 - val_accuracy: 0.8190
Epoch 42/50
40/40 - 1s - loss: 0.3231 - accuracy: 0.8503 - val_loss: 0.3869 - val_accuracy: 0.8167
Epoch 43/50
40/40 - 1s - loss: 0.3236 - accuracy: 0.8499 - val_loss: 0.3828 - val_accuracy: 0.8190
Epoch 44/50
40/40 - 1s - loss: 0.3160 - accuracy: 0.8470 - val_loss: 0.3805 - val_accuracy: 0.8167
Epoch 45/50
40/40 - 1s - loss: 0.3134 - accuracy: 0.8521 - val_loss: 0.3829 - val_accuracy: 0.8261
Epoch 46/50
40/40 - 1s - loss: 0.3105 - accuracy: 0.8529 - val_loss: 0.3856 - val_accuracy: 0.8198
Epoch 47/50
40/40 - 1s - loss: 0.3086 - accuracy: 0.8554 - val_loss: 0.3934 - val_accuracy: 0.8238
Epoch 48/50
40/40 - 1s - loss: 0.3059 - accuracy: 0.8556 - val_loss: 0.3894 - val_accuracy: 0.8230
Epoch 49/50
40/40 - 1s - loss: 0.3001 - accuracy: 0.8572 - val_loss: 0.3910 - val_accuracy: 0.8167
Epoch 50/50
40/40 - 1s - loss: 0.2988 - accuracy: 0.8548 - val_loss: 0.3875 - val_accuracy: 0.8253
plot(history6)
../_images/dl-simple-case_81_0.png

Explanation

from lime.lime_text import LimeTextExplainer

explainer = LimeTextExplainer(class_names=['female','male'], char_level=True)
def model_predict_pipeline(text):
    _seq = tokenizer.texts_to_sequences(text)
    _seq_pad = keras.preprocessing.sequence.pad_sequences(_seq, maxlen=max_len)
    #return np.array([[float(1-x), float(x)] for x in model.predict(np.array(_seq_pad))])
    return model6.predict(np.array(_seq_pad))



# np.array(sequence.pad_sequences(
#     tokenizer.texts_to_sequences([n for (n,l) in test_set]),
#     maxlen = max_len)).astype('float32')
reversed_word_index = dict([(index, word) for (word, index) in tokenizer.word_index.items()])
text_id =305
X_test[text_id]
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 24.,  7.,  5.,
        1., 13.], dtype=float32)
X_test_texts[text_id]
'Zorah'
' '.join([reversed_word_index.get(i, '?') for i in X_test[text_id]])
'? ? ? ? ? ? ? ? ? ? z o r a h'
print(X_test[22])
print(X_test_texts[22])
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  4. 10.  5.  2. 11.]
Andrey
X_test_texts[text_id]
'Zorah'
model_predict_pipeline([X_test_texts[text_id]])
array([[0.7218352, 0.2781648]], dtype=float32)
exp = explainer.explain_instance(
X_test_texts[text_id], model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)
y_test[text_id]
0
exp = explainer.explain_instance(
'Alvin', model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)
exp = explainer.explain_instance(
'Michaelis', model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)
exp = explainer.explain_instance(
'Sidney', model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)
exp = explainer.explain_instance(
'Timber', model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)